/** @file    stringop.c
 ** @author  Andrea Vedaldi
 ** @brief   String Operations - Definition
 **/

/* AUTORIGHTS
Copyright (C) 2007-10 Andrea Vedaldi and Brian Fulkerson

This file is part of VLFeat, available under the terms of the
GNU GPLv2, or (at your option) any later version.
*/

/** @file   stringop.h
 ** @author Andrea Vedaldi
 ** @brief  String Operations

@ref stringop.h implements basic string operations. All functions that
write to strings use range checking, which makes them safer than some
standard POSIX equivalent (see @ref vl-stringop-err).

@secton vl-stringop-file-protocols File protocols

::stringop.h defines a few file "protocols" and helps parsing them
from URL-like formatted strings. The supported protocols are:

<table>
<caption>File protocols</caption>
<th><td>Protocol</td><td>Code</td><td>URL prefix</td></th>
<tr><td>ASCII</td><td>::VL_PROT_ASCII</td><td><code>ascii://</code></td></tr>
<tr><td>BINARY</td><td>::VL_PROT_BINARY</td><td><code>binary://</code></td></tr>
</table>

@section vl-stringop-err Detecting overflow

::stringop.h functions that write a string to a character buffer take
both the buffer and its size @c n as input. If @c n is not large
enough, the output may be truncated but it is always a null terminated
string (provided that @c n &geq; 1). Such functions also return the
length of the string that would have been written @c r (which does not
include the terminating null character) had the buffer been large
enough.  Hence an <em>overflow</em> can be detected by testing if @c r
&gt;= @c n, @c r can be used to re-allocate a buffer large enough to
contain the result, and the operation can be repeated.

**/

#include "stringop.h"

#include <string.h>
#include <ctype.h>

/** ------------------------------------------------------------------
 ** @brief Extract the protocol prefix from a string
 ** @param string string.
 ** @param protocol protocol code (output).
 ** @return pointer to the first character after the protocol prefix.
 **
 ** The function extracts the prefix of the string @a string
 ** terminated by the first occurrence of the @c :// substring (if
 ** any). It then matches the suffix terminated by @c :// to the
 ** supported @ref vl-file-protocols protocols.  If @c protocol is not
 ** @c NULL, the corresponding protocol code is written to @a protocol
 **
 ** The function writes to @a protocol the value ::VL_PROT_NONE if no
 ** suffix is detected and ::VL_PROT_UNKNOWN if there is a suffix but
 ** it cannot be matched to any of the supported protocols.
 **/

VL_EXPORT char *
vl_string_parse_protocol (char const *string, int *protocol)
{
  char const * cpt ;
  int dummy ;

  /* handle the case prot = 0 */
  if (protocol == 0)
    protocol = &dummy ;

  /* look for :// */
  cpt = strstr(string, "://") ;

  if (cpt == 0) {
    *protocol = VL_PROT_NONE ;
    cpt = string ;
  }
  else {
    if (strncmp(string, "ascii", cpt - string) == 0) {
      *protocol = VL_PROT_ASCII ;
    }
    else if (strncmp(string, "bin",   cpt - string) == 0) {
      *protocol = VL_PROT_BINARY ;
    }
    else {
      *protocol = VL_PROT_UNKNOWN ;
    }
    cpt += 3 ;
  }
  return (char*) cpt ;
}

/** ------------------------------------------------------------------
 ** @brief Get protocol name
 ** @param protocol protocol code.
 ** @return pointer protocol name string.
 **
 ** The function returns a pointer to a string containing the name of
 ** the protocol @a protocol (see the @a vl-file-protocols protocols
 ** list).  If the protocol is unknown the function returns the empty
 ** string.
 **/

VL_EXPORT char const *
vl_string_protocol_name (int protocol)
{
  switch (protocol) {
  case VL_PROT_ASCII:
    return "ascii" ;
  case VL_PROT_BINARY:
    return "bin" ;
  case VL_PROT_NONE :
    return "" ;
  default:
    return 0 ;
  }
}


/** ------------------------------------------------------------------
 ** @brief Extract base of file name
 ** @param destination destination buffer.
 ** @param destinationSize size of destination buffer.
 ** @param source input string.
 ** @param maxNumStrippedExtensions maximum number of extensions to strip.
 ** @return length of the destination string.
 **
 ** The function removes the leading path and up to @c
 ** maxNumStrippedExtensions trailing extensions from the string @a
 ** source and writes the result to the buffer @a destination.
 **
 ** The leading path is the longest suffix that ends with either the
 ** @c \ or @c / characters. An extension is a string starting with
 ** the @c . character not containing it. For instance, the string @c
 ** file.png contains the extension @c .png and the string @c
 ** file.tar.gz contains two extensions (@c .tar and @c .gz).
 **
 ** @sa vl-stringop-err.
 **/

VL_EXPORT vl_size
vl_string_basename (char * destination,
                    vl_size destinationSize,
                    char const * source,
                    vl_size maxNumStrippedExtensions)
{
  char c ;
  vl_uindex k = 0, beg, end ;

  /* find beginning */
  beg = 0 ;
  for (k = 0 ; (c = source[k]) ; ++ k) {
    if (c == '\\' || c == '/') beg = k + 1 ;
  }

  /* find ending */
  end = strlen (source) ;
  for (k = end ; k > beg ; --k) {
    if (source[k - 1] == '.' && maxNumStrippedExtensions > 0) {
      -- maxNumStrippedExtensions ;
      end = k - 1 ;
    }
  }

  return vl_string_copy_sub (destination, destinationSize,
                             source + beg, source + end) ;
}

/** -------------------------------------------------------------------
 ** @brief Replace wildcard characters by a string
 ** @param destination output buffer.
 ** @param destinationSize size of the output buffer.
 ** @param source input string.
 ** @param wildcardChar wildcard character.
 ** @param escapeChar escape character.
 ** @param replacement replacement string.
 **
 ** The function replaces the occurrence of the specified wildcard
 ** character @a wildcardChar by the string @a replacement. The result
 ** is written to the buffer @a destination of size @a
 ** destinationSize.
 **
 ** Wildcard characters may be escaped by preceding them by the @a esc
 ** character. More in general, anything following an occurrence of @a
 ** esc character is copied verbatim. To disable the escape characters
 ** simply set @a esc to 0.
 **
 ** @return length of the result.
 ** @sa @ref vl-stringop-err.
 **/

VL_EXPORT vl_size
vl_string_replace_wildcard (char * destination,
                            vl_size destinationSize,
                            char const * source,
                            char wildcardChar,
                            char escapeChar,
                            char const * replacement)
{
  char c ;
  vl_uindex k = 0 ;
  vl_bool escape = 0 ;

  while ((c = *source++)) {

    /* enter escape mode ? */
    if (! escape && c == escapeChar) {
      escape = 1 ;
      continue ;
    }

    /* wildcard or regular? */
    if (! escape && c == wildcardChar) {
      char const * repl = replacement ;
      while ((c = *repl++)) {
        if (destination && k + 1 < destinationSize) {
          destination[k] = c ;
        }
        ++ k ;
      }
    }
    /* regular character */
    else {
      if (destination && k + 1 < destinationSize) {
        destination[k] = c ;
      }
      ++ k ;
    }
    escape = 0 ;
  }

  /* add trailing 0 */
  if (destinationSize > 0) {
    destination[VL_MIN(k, destinationSize - 1)] = 0 ;
  }
  return  k ;
}

/** -------------------------------------------------------------------
 ** @brief Copy string
 ** @param destination output buffer.
 ** @param destinationSize size of the output buffer.
 ** @param source string to copy.
 ** @return length of the source string.
 **
 ** The function copies the string @a source to the buffer @a
 ** destination of size @a destinationSize.
 **
 ** @sa vl-stringop-err.
 **/

VL_EXPORT vl_size
vl_string_copy (char * destination, vl_size destinationSize,
                char const * source)
{
  char c ;
  vl_uindex k = 0 ;

  while ((c = *source++)) {
    if (destination && k + 1 < destinationSize) {
      destination[k] = c ;
    }
    ++ k ;
  }

  /* finalize */
  if (destinationSize > 0) {
    destination[VL_MIN(k, destinationSize - 1)] = 0 ;
  }
  return  k ;
}

/** -------------------------------------------------------------------
 ** @brief Copy substring
 ** @param destination output buffer.
 ** @param destinationSize  size of output buffer.
 ** @param beginning start of the substring.
 ** @param end end of the substring.
 ** @return length of the destination string.
 **
 ** The function copies the substring from at @a beginning to @a end
 ** (not included) to the buffer @a destination of size @a
 ** destinationSize. If, however, the null character is found before
 ** @a end, the substring terminates there.
 **
 ** @sa vl-stringop-err.
 **/

VL_EXPORT vl_size
vl_string_copy_sub (char * destination,
                    vl_size destinationSize,
                    char const * beginning,
                    char const * end)
{
  char c ;
  vl_uindex k = 0 ;

  while (beginning < end && (c = *beginning++)) {
    if (destination && k + 1 < destinationSize) {
      destination[k] = c ;
    }
    ++ k ;
  }

  /* finalize */
  if (destinationSize > 0) {
    destination[VL_MIN(k, destinationSize - 1)] = 0 ;
  }
  return  k ;
}

/** ------------------------------------------------------------------
 ** @brief Search character in reversed order
 ** @param beginning pointer to the substring beginning.
 ** @param end pointer to the substring end.
 ** @param c character to search for.
 ** @return pointer to last occurrence of @a c, or 0 if none.
 **
 ** The function searches for the last occurrence of the character @a c
 ** in the substring from @a beg to @a end (the latter not being included).
 **/

VL_EXPORT char *
vl_string_find_char_rev (char const *beginning, char const* end, char c)
{
  while (end -- != beginning) {
    if (*end == c) {
      return (char*) end ;
    }
  }
  return 0 ;
}

/** -------------------------------------------------------------------
 ** @brief Calculate string length
 ** @param string string.
 ** @return string length.
 **/

VL_EXPORT vl_size
vl_string_length (char const *string)
{
  vl_uindex i ;
  for (i = 0 ; string[i] ; ++i) ;
  return i ;
}

/** -------------------------------------------------------------------
 ** @brief Compare strings case-insensitive
 ** @param string1 fisrt string.
 ** @param string2 second string.
 ** @return integer =,<,> 0 if @c string1 =,<,> @c string2
 **/

VL_EXPORT int
vl_string_casei_cmp (const char * string1, const char * string2)
{
  while (tolower((char unsigned)*string1) ==
         tolower((char unsigned)*string2))
  {
    if (*string1 == 0) {
      return 0 ;
    }
    string1 ++ ;
    string2 ++ ;
  }
  return
    (int)tolower((char unsigned)*string1) -
    (int)tolower((char unsigned)*string2) ;
}
